TO DO LIST
- riduce the number of genres-> produce a list
library(ggplot2)
library(tidyverse)
── Attaching core tidyverse packages ─────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ lubridate 1.9.2 ✔ tibble 3.2.1
✔ purrr 1.0.1 ✔ tidyr 1.3.0
✔ readr 2.1.4 ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(lubridate)
library(dplyr)
Explore columns
for (i in 1:dim(tracks)[2]){
print(paste0(i,"----",colnames(tracks)[i]))
print(tracks[1:10,i])
}
[1] "1----artists"
[1] "Nayt" "Mahmood"
[3] "NASKA" "NASKA"
[5] "Bresh" "Marracash"
[7] "Rino Gaetano" "NASKA"
[9] "Pinguini Tattici Nucleari" "Marracash"
[1] "2----album_name"
[1] "Un bacio (Deluxe Edition)" "Brividi"
[3] "REBEL" "REBEL"
[5] "ORO BLU" "NOI, LORO, GLI ALTRI"
[7] "Q Concert" "REBEL (Deluxe)"
[9] "Giovani Wannabe" "NOI, LORO, GLI ALTRI"
[1] "3----year"
[1] 2016 2022 2022 2022 2022 2021 1981 2022 2022 2021
[1] "4----season"
[1] "Spring" "Winter" "Spring" "Spring" "Spring" "Autumn" "missing"
[8] "Autumn" "Spring" "Autumn"
[1] "5----popularity"
[1] 32 65 53 46 59 64 46 55 63 66
[1] "6----acousticness"
[1] 0.50300 0.44800 0.02820 0.00162 0.36900 0.48500 0.05690 0.01360
[9] 0.04090 0.57000
[1] "7----danceability"
[1] 0.792 0.523 0.455 0.594 0.489 0.614 0.566 0.405 0.739 0.727
[1] "8----energy"
[1] 0.631 0.614 0.677 0.920 0.367 0.464 0.376 0.616 0.810 0.698
[1] "9----instrumentalness"
[1] 0 0 0 0 0 0 0 0 0 0
[1] "10----liveness"
[1] 0.1420 0.2540 0.5640 0.1590 0.1980 0.0812 0.4740 0.1090 0.1220 0.1100
[1] "11----loudness"
[1] -11.908 -4.435 -5.041 -5.566 -10.254 -8.473 -11.643 -6.442
[9] -5.317 -9.676
[1] "12----speechiness"
[1] 0.0992 0.0347 0.0338 0.0519 0.0878 0.0803 0.0443 0.0347 0.0311 0.3650
[1] "13----tempo"
[1] 108.076 122.962 179.981 139.929 131.302 152.092 176.366 160.219
[9] 127.972 92.915
[1] "14----key"
[1] 10 7 4 2 0 9 0 1 11 8
[1] "15----mode"
[1] 1 1 1 1 1 1 1 1 0 1
[1] "16----duration_ms"
[1] 141111 199146 219846 170388 231602 195799 217629 179815 212966 234446
[1] "17----valence"
[1] 0.805 0.342 0.266 0.492 0.524 0.293 0.486 0.199 0.948 0.356
[1] "18----time_signature"
[1] 4 4 4 4 4 4 4 3 4 4
[1] "19----track.id"
[1] "3o5AiG9Omh5GWlBNKEVcA9" "1ZMGp9MTXbtAPvcKa0U3zS"
[3] "6fPGBlx8wsAxhoDn7BwiAH" "75U0n5xhZT3al2oC3I61rG"
[5] "6I28wnb48iMVVfyTSf4lkx" "0WgVvy1KelQxG6KBUukTWI"
[7] "5mA4wMDUbf9A2N0vzCR80R" "0jkBVkeS6L5NtvPn29NeIK"
[9] "7iLuBTHJSXM2HalKHFqEEy" "4gxRyOZefp95AXZFaztdtO"
[1] "20----genre_1"
[1] "italian hip hop" "italian adult pop" "emo rap italiano"
[4] "emo rap italiano" "italian hip hop" "italian hip hop"
[7] "canzone d'autore" "emo rap italiano" "bergamo indie"
[10] "italian hip hop"
[1] "21----genre_2"
[1] "italian" "italian" "italian" "italian" "italian" "italian" NA
[8] "italian" NA "italian"
Ranges
for (i in 5:18){
print(c(i,colnames(tracks)[i],range(tracks[,i])))
}
[1] "5" "popularity" "19" "95"
[1] "6" "acousticness" "0.000519" "0.921"
[1] "7" "danceability" "0.352" "0.877"
[1] "8" "energy" "0.228" "0.968"
[1] "9" "instrumentalness" "0"
[4] "0.014"
[1] "10" "liveness" "0.0344" "0.667"
[1] "11" "loudness" "-14.291" "-2.363"
[1] "12" "speechiness" "0.0261" "0.365"
[1] "13" "tempo" "74.836" "197.773"
[1] "14" "key" "0" "11"
[1] "15" "mode" "0" "1"
[1] "16" "duration_ms" "137562" "326893"
[1] "17" "valence" "0.0397" "0.948"
[1] "18" "time_signature" "3" "5"
numerical_values = c(5:15,17,18)
length(numerical_values)
[1] 13
distributions
numerical_values = 5:18
for(col in colnames(tracks)[numerical_values]){
bins = range(tracks[[col]])[2]-range(tracks[[col]])[1]
plot <- ggplot(data = tracks, mapping = aes(x =.data[[col]])) +
geom_histogram(bins =30, fill = "blue", color = "black", alpha = 0.7) +
labs(title = col, x = "", y = "Frequency") +
theme_bw()
print(plot)
}














# Add a column indicating the source data frame
df_list_with_names <- lapply(seq_along(dataset_list), function(i) {
dataset_list[[i]]$id <- names(dataset_list)[i]
# here you can add all kind of columns from the survey dataset
#dataset_list[[i]]$study <- dataset_survey[i,"Che.cosa.studi."]
dataset_list[[i]]$state <- dataset_survey[i,"In.che.stato.vivi."]
return(dataset_list[[i]])
})
# Combine the data frames into a single data frame
df_all_in_one <- bind_rows(df_list_with_names)
# Print the result
print(df_all_in_one[seq(1,1000,50),])
df_all_in_one=df_all_in_one[,-1]
table(df_all_in_one$genre_2)
alternative brighton classical hip house indie italian
45 1 57 142 23 88 2149
italiana italiano missing pop rock
53 98 15 813 173
numerical_values_new = numerical_values[-which(numerical_values%in%c(9,10))]
columns = colnames(df_all_in_one)[numerical_values_new]
#columns =colnames(tracks)[7:10]
for(i in 1:(length(columns)-1)){
for(j in (i+1):length(columns)){
plot = ggplot(df_all_in_one, aes(df_all_in_one[,columns[i]],df_all_in_one[,columns[j]] , colour = state )) +
geom_point()+
theme_bw()+
theme(legend.position = "none")+
labs(x =columns[i],y = columns[j] )
print(plot)
}
}


































































Blueprint for plots

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIFRPIERPIExJU1QNCi0gcmlkdWNlIHRoZSBudW1iZXIgb2YgZ2VucmVzLT4gcHJvZHVjZSBhIGxpc3QNCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkobHVicmlkYXRlKQ0KbGlicmFyeShkcGx5cikNCmBgYA0KDQpgYGB7cn0NCmRhdGFzZXRfbGlzdCA9IHJlYWRSRFMoIi4uLy4uL2RhdGEvZGF0YV9nZW5yZXNfY2xlYW5lZC5SRGF0YSIpDQp0cmFja3MgPSBkYXRhc2V0X2xpc3RbWzFdXQ0KZGltKHRyYWNrcykNCmhlYWQodHJhY2tzKQ0KdHJhY2tzID0gdHJhY2tzWywtMV0NCmBgYA0KDQoNCiMgRXhwbG9yZSBjb2x1bW5zDQpgYGB7cn0NCmZvciAoaSBpbiAxOmRpbSh0cmFja3MpWzJdKXsNCiAgcHJpbnQocGFzdGUwKGksIi0tLS0iLGNvbG5hbWVzKHRyYWNrcylbaV0pKQ0KICBwcmludCh0cmFja3NbMToxMCxpXSkNCn0NCmBgYA0KIyBSYW5nZXMNCmBgYHtyfQ0KbnVtZXJpY2FsX3ZhbHVlcyA9IDU6MTgNCmZvciAoaSBpbiBudW1lcmljYWxfdmFsdWVzKXsNCiAgcHJpbnQoYyhpLGNvbG5hbWVzKHRyYWNrcylbaV0scmFuZ2UodHJhY2tzWyxpXSkpKQ0KfQ0KbGVuZ3RoKG51bWVyaWNhbF92YWx1ZXMpDQpgYGANCg0KDQojIGRpc3RyaWJ1dGlvbnMNCmBgYHtyfQ0KDQpmb3IoY29sIGluIGNvbG5hbWVzKHRyYWNrcylbbnVtZXJpY2FsX3ZhbHVlc10pew0KICBiaW5zID0gcmFuZ2UodHJhY2tzW1tjb2xdXSlbMl0tcmFuZ2UodHJhY2tzW1tjb2xdXSlbMV0NCiAgcGxvdCA8LSBnZ3Bsb3QoZGF0YSA9IHRyYWNrcywgbWFwcGluZyA9IGFlcyh4ID0uZGF0YVtbY29sXV0pKSArDQogIGdlb21faGlzdG9ncmFtKGJpbnMgPTMwLCBmaWxsID0gImJsdWUiLCBjb2xvciA9ICJibGFjayIsIGFscGhhID0gMC43KSArDQogIGxhYnModGl0bGUgPSBjb2wsIHggPSAiIiwgeSA9ICJGcmVxdWVuY3kiKSArDQogIHRoZW1lX2J3KCkNCiAgDQogIHByaW50KHBsb3QpDQp9DQoNCmBgYA0KDQoNCg0KDQpgYGB7cn0NCiMgQWRkIGEgY29sdW1uIGluZGljYXRpbmcgdGhlIHNvdXJjZSBkYXRhIGZyYW1lDQpkZl9saXN0X3dpdGhfbmFtZXMgPC0gbGFwcGx5KHNlcV9hbG9uZyhkYXRhc2V0X2xpc3QpLCBmdW5jdGlvbihpKSB7DQogIGRhdGFzZXRfbGlzdFtbaV1dJGlkIDwtIG5hbWVzKGRhdGFzZXRfbGlzdClbaV0NCiAgIyBoZXJlIHlvdSBjYW4gYWRkIGFsbCBraW5kIG9mIGNvbHVtbnMgZnJvbSB0aGUgc3VydmV5IGRhdGFzZXQNCiAgI2RhdGFzZXRfbGlzdFtbaV1dJHN0dWR5IDwtIGRhdGFzZXRfc3VydmV5W2ksIkNoZS5jb3NhLnN0dWRpLiJdDQogIGRhdGFzZXRfbGlzdFtbaV1dJHN0YXRlIDwtIGRhdGFzZXRfc3VydmV5W2ksIkluLmNoZS5zdGF0by52aXZpLiJdDQogIHJldHVybihkYXRhc2V0X2xpc3RbW2ldXSkNCn0pDQoNCiMgQ29tYmluZSB0aGUgZGF0YSBmcmFtZXMgaW50byBhIHNpbmdsZSBkYXRhIGZyYW1lDQpkZl9hbGxfaW5fb25lIDwtIGJpbmRfcm93cyhkZl9saXN0X3dpdGhfbmFtZXMpDQoNCiMgUHJpbnQgdGhlIHJlc3VsdA0KcHJpbnQoZGZfYWxsX2luX29uZVtzZXEoMSwxMDAwLDUwKSxdKQ0KZGZfYWxsX2luX29uZT1kZl9hbGxfaW5fb25lWywtMV0NCmBgYA0KYGBge3J9DQp0YWJsZShkZl9hbGxfaW5fb25lJGdlbnJlXzIpDQpgYGANCg0KDQoNCg0KYGBge3J9DQpudW1lcmljYWxfdmFsdWVzX25ldyA9IG51bWVyaWNhbF92YWx1ZXNbLXdoaWNoKG51bWVyaWNhbF92YWx1ZXMlaW4lYyg5LDEwKSldDQpjb2x1bW5zID0gY29sbmFtZXMoZGZfYWxsX2luX29uZSlbbnVtZXJpY2FsX3ZhbHVlc19uZXddDQojY29sdW1ucyA9Y29sbmFtZXModHJhY2tzKVs3OjEwXQ0KZm9yKGkgaW4gMToobGVuZ3RoKGNvbHVtbnMpLTEpKXsNCiAgZm9yKGogaW4gKGkrMSk6bGVuZ3RoKGNvbHVtbnMpKXsNCiAgICBwbG90ID0gZ2dwbG90KGRmX2FsbF9pbl9vbmUsIGFlcyhkZl9hbGxfaW5fb25lWyxjb2x1bW5zW2ldXSxkZl9hbGxfaW5fb25lWyxjb2x1bW5zW2pdXSAsIGNvbG91ciA9IHN0YXRlICkpICsgDQogICAgICAgIGdlb21fcG9pbnQoKSsNCiAgICAgICAgdGhlbWVfYncoKSsNCiAgICAgICAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKSsNCiAgICAgICAgbGFicyh4ID1jb2x1bW5zW2ldLHkgPSBjb2x1bW5zW2pdICkNCiAgICBwcmludChwbG90KQ0KICB9DQp9DQpgYGANCg0KDQojIEJsdWVwcmludCBmb3IgcGxvdHMgDQpgYGB7cn0NCnBsb3QgPSBnZ3Bsb3QodHJhY2tzLCBhZXMoZW5lcmd5LGxvdWRuZXNzICwgY29sb3VyID0gZ2VucmVfMSApKSArIA0KICBnZW9tX3BvaW50KCkrDQogIHRoZW1lX2J3KCkrDQogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIikNCg0KcHJpbnQocGxvdCkNCmBgYA0KDQoNCg0KDQoNCg0KDQoNCg0KDQo=